{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "## Ch `07`: Concept `03`"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "deletable": true,
    "editable": true
   },
   "source": [
    "## Denoising autoencoder"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "deletable": true,
    "editable": true
   },
   "source": [
    "A denoising autoencoder is pretty much the same architecture as a normal autoencoder. The input is noised up, and cost function tries to denoise it by minimizing the construction error from denoised input to clean output."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": [
    "import tensorflow as tf\n",
    "import numpy as np\n",
    "import time\n",
    "\n",
    "def get_batch(X, Xn, size):\n",
    "    a = np.random.choice(len(X), size, replace=False)\n",
    "    return X[a], Xn[a]\n",
    "\n",
    "class Denoiser:\n",
    "\n",
    "    def __init__(self, input_dim, hidden_dim, epoch=10000, batch_size=50, learning_rate=0.001):\n",
    "        self.epoch = epoch\n",
    "        self.batch_size = batch_size\n",
    "        self.learning_rate = learning_rate\n",
    "\n",
    "        self.x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x')\n",
    "        self.x_noised = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x_noised')\n",
    "        with tf.name_scope('encode'):\n",
    "            self.weights1 = tf.Variable(tf.random_normal([input_dim, hidden_dim], dtype=tf.float32), name='weights')\n",
    "            self.biases1 = tf.Variable(tf.zeros([hidden_dim]), name='biases')\n",
    "            self.encoded = tf.nn.sigmoid(tf.matmul(self.x_noised, self.weights1) + self.biases1, name='encoded')\n",
    "        with tf.name_scope('decode'):\n",
    "            weights = tf.Variable(tf.random_normal([hidden_dim, input_dim], dtype=tf.float32), name='weights')\n",
    "            biases = tf.Variable(tf.zeros([input_dim]), name='biases')\n",
    "            self.decoded = tf.matmul(self.encoded, weights) + biases\n",
    "        self.loss = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(self.x, self.decoded))))\n",
    "        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)\n",
    "        self.saver = tf.train.Saver()\n",
    "\n",
    "    def add_noise(self, data):\n",
    "        noise_type = 'mask-0.2'\n",
    "        if noise_type == 'gaussian':\n",
    "            n = np.random.normal(0, 0.1, np.shape(data))\n",
    "            return data + n\n",
    "        if 'mask' in noise_type:\n",
    "            frac = float(noise_type.split('-')[1])\n",
    "            temp = np.copy(data)\n",
    "            for i in temp:\n",
    "                n = np.random.choice(len(i), round(frac * len(i)), replace=False)\n",
    "                i[n] = 0\n",
    "            return temp\n",
    "\n",
    "    def train(self, data):\n",
    "        data_noised = self.add_noise(data)\n",
    "        with open('log.csv', 'w') as writer:\n",
    "            with tf.Session() as sess:\n",
    "                sess.run(tf.global_variables_initializer())\n",
    "                for i in range(self.epoch):\n",
    "                    for j in range(50):\n",
    "                        batch_data, batch_data_noised = get_batch(data, data_noised, self.batch_size)\n",
    "                        l, _ = sess.run([self.loss, self.train_op], feed_dict={self.x: batch_data, self.x_noised: batch_data_noised})\n",
    "                    if i % 10 == 0:\n",
    "                        print('epoch {0}: loss = {1}'.format(i, l))\n",
    "                        self.saver.save(sess, './model.ckpt')\n",
    "                        epoch_time = int(time.time())\n",
    "                        row_str = str(epoch_time) + ',' + str(i) + ',' + str(l) + '\\n'\n",
    "                        writer.write(row_str)\n",
    "                        writer.flush()\n",
    "                self.saver.save(sess, './model.ckpt')\n",
    "\n",
    "    def test(self, data):\n",
    "        with tf.Session() as sess:\n",
    "            self.saver.restore(sess, './model.ckpt')\n",
    "            hidden, reconstructed = sess.run([self.encoded, self.decoded], feed_dict={self.x: data})\n",
    "        print('input', data)\n",
    "        print('compressed', hidden)\n",
    "        print('reconstructed', reconstructed)\n",
    "        return reconstructed\n",
    "\n",
    "    def get_params(self):\n",
    "        with tf.Session() as sess:\n",
    "            self.saver.restore(sess, './model.ckpt')\n",
    "            weights, biases = sess.run([self.weights1, self.biases1])\n",
    "        return weights, biases"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true,
    "deletable": true,
    "editable": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
